# Load libraries
library(magrittr)
library(readr)
library(dplyr)
library(tidyr)
library(readxl)
library(stringr)
library(ggplot2)
library(gridExtra)
library(scales)
library(Cairo)
library(grid)
library(vcd)
library(countrycode)
library(maptools)
library(rgdal)
# Load data and add labels
# TODO: Use non-unicode-mangled data
load(file.path(PROJHOME, "data_raw", "responses_orgs.RData"))
load(file.path(PROJHOME, "data_raw", "responses_countries.RData"))
responses.orgs.labs <- read_csv(file.path(PROJHOME, "data_raw", "response_orgs_labels.csv"))
responses.countries.labs <- read_csv(file.path(PROJHOME, "data_raw", "response_countries_labels.csv"))
Hmisc::label(responses.orgs, self=FALSE) <- responses.orgs.labs$varlabel
Hmisc::label(responses.countries, self=FALSE) <- responses.countries.labs$varlabel
# Add surevy sources
phone <- readRDS(file.path(PROJHOME, "data_raw", "phone.rds"))
linkedin <- readRDS(file.path(PROJHOME, "data_raw", "linkedin.rds"))
responses.orgs %<>%
mutate(survey.method = ifelse(survey.id %in% phone, "Phone",
ifelse(survey.id %in% linkedin, "LinkedIn",
"Online")))
# Add a nicer short ID to each response to reference in the article
set.seed(1234)
nicer.ids <- data_frame(survey.id = responses.orgs$survey.id) %>%
mutate(clean.id = 1000 + sample(1:n()))
write_csv(nicer.ids, path=file.path(PROJHOME, "data", "id_lookup_WILL_BE_OVERWRITTEN.csv"))
The data is split into two sets: responses.orgs has a row for each surveyed organization and responses.countries has a row for each country organizations responded for (1-4 countries per organization). For ease of analysis, this combines them into one larger dataframe (so organization-level data is repeated). It also removes columns that were added manually, where an RA coded whether a country was mentioned in different questions (with a colum for each country!).
responses.all <- responses.orgs %>%
left_join(responses.countries, by="survey.id") %>%
select(-contains("_c", ignore.case=FALSE)) %>% # Get rid of all the dummy vars
left_join(nicer.ids, by="survey.id") %>%
select(survey.id, clean.id, everything())
Convert some responses into numeric indexes:
importance <- data_frame(Q3.19 = levels(responses.countries$Q3.19),
importance = c(2, 1, 0, NA))
importance.levels <- data_frame(start=c(0, 1, 2),
end=c(1, 2, 3),
level=c("Not important", "Somewhat important",
"Most important"),
level.ordered=factor(level, levels=level, ordered=TRUE))
positivity <- data_frame(Q3.25 = levels(responses.countries$Q3.25),
positivity = c(-1, 1, 0, NA))
improvement <- data_frame(Q3.26 = levels(responses.countries$Q3.26),
improvement = c(1, 0, -1, NA))
# Cho data
# TODO: Someday get this directly from the internet, like Freedom House data
# http://www.economics-human-trafficking.org/data-and-reports.html
tip.change <- read_csv(file.path(PROJHOME, "data", "policy_index.csv")) %>%
group_by(countryname) %>%
summarise(avg_tier = mean(tier, na.rm=TRUE),
improve_tip = (last(na.omit(tier), default=NA) -
first(na.omit(tier), default=NA)),
change_policy = last(na.omit(p), default=NA) -
first(na.omit(p), default=NA)) %>%
mutate(countryname = countrycode(countryname, "country.name", "country.name"))
# Democracy (Freedom House)
fh.url <- "https://freedomhouse.org/sites/default/files/Individual%20Country%20Ratings%20and%20Status%2C%201973-2015%20%28FINAL%29.xls"
fh.tmp <- paste0(tempdir(), basename(fh.url))
download.file(fh.url, fh.tmp)
fh.raw <- read_excel(fh.tmp, skip=6)
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10
# Calculate the number of years covered in the data (each year has three columns)
num.years <- (ncol(fh.raw) - 1)/3
# Create combinations of all the variables and years
var.years <- expand.grid(var = c('PR', 'CL', 'Status'),
year = 1972:(1972 + num.years - 1))
colnames(fh.raw) <- c('country', paste(var.years$var, var.years$year, sep="_"))
# Split columns and convert to long
fh <- fh.raw %>%
gather(var.year, value, -country) %>%
separate(var.year, into=c("indicator", "year"), sep="_") %>%
filter(!is.na(country)) %>%
spread(indicator, value) %>%
mutate(year = as.numeric(year),
CL = suppressWarnings(as.integer(CL)),
PR = suppressWarnings(as.integer(PR)),
Status = factor(Status, levels=c("NF", "PF", "F"),
labels=c("Not free", "Partially free", "Free"),
ordered=TRUE),
total.freedom = CL + PR,
country.clean = countrycode(country, "country.name", "country.name")) %>%
filter(!is.na(CL) & !is.na(PR)) %>%
# All the cases we're interested in are after 2000, so we can remove these
# problematic double countries
filter(!(country %in% c("Germany, E.", "Germany, W.", "USSR", "Vietnam, N.",
"Vietnam, S.", "Yemen, N.", "Yemen, S."))) %>%
# Again, because we only care about post-2000 Serbia, merge with Yugoslavia
mutate(country.clean = ifelse(country.clean == "Yugoslavia",
"Serbia", country.clean)) %>%
select(-country, country=country.clean)
fh.summary <- fh %>%
filter(year >= 2000) %>%
group_by(country) %>%
summarize(total.freedom = mean(total.freedom, na.rm=TRUE))
# Funding
funding.raw <- read_csv(file.path(PROJHOME, "data_raw", "funding_clean.csv")) %>%
mutate(cowcode = ifelse(country == "Serbia", 555, cowcode),
countryname = countrycode(cowcode, "cown", "country.name"),
countryname = ifelse(cowcode == 555, "Serbia", countryname)) %>%
filter(!is.na(countryname))
funding.all <- funding.raw %>%
group_by(countryname) %>%
summarise(total.funding = sum(amount, na.rm=TRUE),
avg.funding = mean(amount, na.rm=TRUE))
funding.ngos <- funding.raw %>%
filter(recipient_type %in% c("NGO", "NPO")) %>%
group_by(countryname) %>%
summarise(total.funding.ngos = sum(amount, na.rm=TRUE),
avg.funding.ngos = mean(amount, na.rm=TRUE))
responses.full <- responses.all %>%
filter(work.only.us != "Yes") %>%
mutate(work.country.clean = countrycode(work.country,
"country.name", "country.name"),
work.country.clean = ifelse(is.na(work.country),
"Global", work.country.clean),
work.country = work.country.clean) %>%
left_join(tip.change, by=c("work.country" = "countryname")) %>%
left_join(funding.all, by=c("work.country" = "countryname")) %>%
left_join(funding.ngos, by=c("work.country" = "countryname")) %>%
left_join(fh.summary, by=c("work.country" = "country")) %>%
left_join(positivity, by = "Q3.25") %>%
left_join(importance, by = "Q3.19") %>%
left_join(improvement, by = "Q3.26") %>%
mutate(received.funding = ifelse(Q3.18_3 != 1 | is.na(Q3.18_3), FALSE, TRUE),
us.involvement = ifelse(Q3.18_5 != 1 | is.na(Q3.18_5), TRUE, FALSE),
us.hq = ifelse(home.country == "United States", TRUE, FALSE),
Q3.19 = factor(Q3.19, levels=c("Most important actor",
"Somewhat important actor",
"Not an important actor",
"Don't know"),
ordered=TRUE),
Q3.25_collapsed = ifelse(Q3.25 == "Negative", NA, Q3.25))
country.indexes <- responses.full %>%
filter(!is.na(work.country)) %>%
group_by(work.country) %>%
# Needs mutate + mutate_each + select + unique because you can't mix
# summarise + summarise_each. See http://stackoverflow.com/a/31815540/120898
mutate(num.responses = n()) %>%
mutate_each(funs(score = mean(., na.rm=TRUE), stdev = sd(., na.rm=TRUE),
n = sum(!is.na(.))),
c(positivity, importance, improvement)) %>%
select(work.country, num.responses, matches("positivity_|importance_|improvement_")) %>%
unique %>%
ungroup() %>%
arrange(desc(num.responses))
# Load map data
if (!file.exists(file.path(PROJHOME, "data_external", "map_data",
"ne_110m_admin_0_countries.VERSION.txt"))) {
map.url <- paste0("http://www.naturalearthdata.com/",
"http//www.naturalearthdata.com/download/110m/cultural/",
"ne_110m_admin_0_countries.zip")
map.tmp <- file.path(PROJHOME, "data_external", basename(map.url))
download.file(map.url, map.tmp)
unzip(map.tmp, exdir=file.path(PROJHOME, "data_external", "map_data"))
unlink(map.tmp)
}
countries.map <- readOGR(file.path(PROJHOME, "data_external", "map_data"),
"ne_110m_admin_0_countries")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/andrew/Research/••Projects/Human trafficking/Anti-TIP NGOs and the US/data_external/map_data", layer: "ne_110m_admin_0_countries"
## with 177 features
## It has 63 fields
countries.robinson <- spTransform(countries.map, CRS("+proj=robin"))
countries.ggmap <- fortify(countries.robinson, region="iso_a3") %>%
filter(!(id %in% c("ATA", -99))) %>% # Get rid of Antarctica and NAs
mutate(id = ifelse(id == "GRL", "DNK", id)) # Greenland is part of Denmark
# All possible countries (to fix the South Sudan issue)
possible.countries <- data_frame(id = unique(as.character(countries.ggmap$id)))
# Save data
# TODO: Make responses.full more anonymous before making it public
# TODO: Save as CSV and Stata too instead of just R
saveRDS(responses.full, file.path(PROJHOME, "data", "responses_full.rds"))
saveRDS(country.indexes, file.path(PROJHOME, "data", "country_indexes.rds"))
# Useful functions
theme_clean <- function(base_size=9, base_family="Source Sans Pro Light") {
ret <- theme_bw(base_size, base_family) +
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
panel.border = element_blank(), axis.line=element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(size=0.25, colour="grey90"),
axis.ticks=element_blank(),
legend.position="bottom",
axis.title=element_text(size=rel(0.8), family="Source Sans Pro Semibold"),
strip.text=element_text(size=rel(0.9), family="Source Sans Pro Semibold"),
strip.background=element_rect(fill="#ffffff", colour=NA),
panel.margin=unit(1, "lines"), legend.key.size=unit(.7, "line"),
legend.key=element_blank())
ret
}
# For maps
theme_blank_map <- function(base_size=12, base_family="Source Sans Pro Light") {
ret <- theme_bw(base_size, base_family) +
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
panel.border=element_blank(), axis.line=element_blank(),
panel.grid=element_blank(), axis.ticks=element_blank(),
axis.title=element_blank(), axis.text=element_blank(),
legend.text=element_text(size=rel(0.7), family="Source Sans Pro Light"),
legend.title=element_text(size=rel(0.7), family="Source Sans Pro Semibold"),
strip.text=element_text(size=rel(1), family="Source Sans Pro Semibold"))
ret
}
# Return a data frame of counts and proportions for multiple responses
separate.answers.summary <- function(df, cols, labels, total=FALSE) {
cols.to.select <- which(colnames(df) %in% cols)
denominator <- df %>%
select(cols.to.select) %>%
mutate(num.answered = rowSums(., na.rm=TRUE)) %>%
filter(num.answered > 0) %>%
nrow()
df <- df %>%
select(survey.id, cols.to.select) %>%
gather(question, value, -survey.id) %>%
mutate(question = factor(question, labels=labels, ordered=TRUE)) %>%
group_by(question) %>%
summarize(response = sum(value, na.rm=TRUE),
pct = round(response / denominator * 100, 2),
plot.pct = response / denominator)
colnames(df) <- c("Answer", "Responses", "%", "plot.pct")
if (total) {
df <- df %>% select(1:3)
df <- rbind(as.matrix(df), c("Total responses", denominator, "—"))
}
return(list(df=df, denominator=denominator))
}
# Create a character vector of significance stars
add.stars <- function(x) {
as.character(symnum(x, corr = FALSE,
cutpoints = c(0, .001,.01,.05, .1, 1),
symbols = c("***","**","*","."," ")))
}
TODO: Response rate How many times did respondents loop through the country questions?
responses.full %>%
group_by(survey.id) %>%
summarise(loops = n()) %>%
do(data.frame(table(.$loops, dnn="Countries"))) %>%
mutate(prop = Freq / sum(Freq))
## Countries Freq prop
## 1 1 415 0.86458333
## 2 2 52 0.10833333
## 3 3 10 0.02083333
## 4 4 3 0.00625000
How did respondents take the survey?
responses.full %>%
group_by(survey.id) %>%
slice(1) %>% # Select each organization's first country
group_by(survey.method) %>%
summarise(num = n()) %>%
mutate(prop = num / sum(num))
## Source: local data frame [3 x 3]
##
## survey.method num prop
## (chr) (int) (dbl)
## 1 LinkedIn 3 0.00625000
## 2 Online 463 0.96458333
## 3 Phone 14 0.02916667
How many respondents answered at least one free response question?
responses.full %>%
select(Q3.10:Q3.17, Q3.24.Text, Q3.30, Q4.1) %T>%
{cat("Number of free response questions:", ncol(.), "\n")} %>%
rowwise() %>% do(wrote.something = !all(is.na(.))) %>%
ungroup() %>% mutate(wrote.something = unlist(wrote.something)) %>%
bind_cols(select(responses.full, survey.id)) %>%
group_by(survey.id) %>%
summarise(wrote.something = ifelse(sum(wrote.something) > 0, TRUE, FALSE)) %T>%
{cat("Number responses:", nrow(.), "\n")} %>%
do(as.data.frame(table(.$wrote.something, dnn="Wrote something"))) %>%
mutate(Percent = Freq / sum(Freq))
## Number of free response questions: 12
## Number responses: 480
## Wrote.something Freq Percent
## 1 FALSE 69 0.14375
## 2 TRUE 411 0.85625
Where are these NGOs based?
hq.countries <- responses.full %>%
group_by(survey.id) %>%
slice(1) %>%
ungroup() %>%
mutate(id = countrycode(home.country, "country.name", "iso3c"),
id = ifelse(home.country == "Kosovo", "KOS", id)) %>%
group_by(id) %>%
summarize(num.ngos = n()) %>%
ungroup() %>%
right_join(possible.countries, by="id") %>%
mutate(num.ceiling = ifelse(num.ngos >= 10, 10, num.ngos)) %>%
arrange(desc(num.ngos)) %T>%
{print(head(.))}
## Source: local data frame [6 x 3]
##
## id num.ngos num.ceiling
## (chr) (int) (dbl)
## 1 USA 56 10
## 2 IND 27 10
## 3 NGA 25 10
## 4 GBR 21 10
## 5 CAN 14 10
## 6 THA 10 10
hq.map <- ggplot(hq.countries, aes(fill=num.ceiling, map_id=id)) +
geom_map(map=countries.ggmap, size=0.15, colour="black") +
expand_limits(x=countries.ggmap$long, y=countries.ggmap$lat) +
coord_equal() +
scale_fill_gradient(low="grey95", high="grey20", breaks=seq(2, 10, 2),
labels=c(paste(seq(2, 8, 2), " "), "10+"),
na.value="#FFFFFF", name="NGOs based in country",
guide=guide_colourbar(ticks=FALSE, barwidth=6)) +
theme_blank_map() +
theme(legend.position="bottom", legend.key.size=unit(0.65, "lines"),
strip.background=element_rect(colour="#FFFFFF", fill="#FFFFFF"))
hq.map
Where do these NGOs work?
work.countries <- responses.full %>%
mutate(id = countrycode(work.country, "country.name", "iso3c"),
id = ifelse(work.country == "Kosovo", "KOS", id)) %>%
group_by(id) %>%
summarize(num.ngos = n()) %>%
ungroup() %>%
right_join(possible.countries, by="id") %>%
mutate(num.ceiling = ifelse(num.ngos >= 10, 10, num.ngos)) %>%
arrange(desc(num.ngos)) %T>%
{print(head(.))}
## Source: local data frame [6 x 3]
##
## id num.ngos num.ceiling
## (chr) (int) (dbl)
## 1 IND 39 10
## 2 NGA 33 10
## 3 GBR 21 10
## 4 NPL 18 10
## 5 PHL 15 10
## 6 THA 15 10
work.map <- ggplot(work.countries, aes(fill=num.ceiling, map_id=id)) +
geom_map(map=countries.ggmap, size=0.15, colour="black") +
expand_limits(x=countries.ggmap$long, y=countries.ggmap$lat) +
coord_equal() +
scale_fill_gradient(low="grey95", high="grey20", breaks=seq(2, 10, 2),
labels=c(paste(seq(2, 8, 2), " "), "10+"),
na.value="#FFFFFF", name="NGOs working in country",
guide=guide_colourbar(ticks=FALSE, barwidth=6)) +
theme_blank_map() +
theme(legend.position="bottom", legend.key.size=unit(0.65, "lines"),
strip.background=element_rect(colour="#FFFFFF", fill="#FFFFFF"))
work.map
Combined maps
fig.maps <- arrangeGrob(hq.map, work.map, nrow=1)
ggsave(fig.maps, filename=file.path(PROJHOME, "figures", "fig_maps.pdf"),
width=6, height=3, units="in", device=cairo_pdf, scale=1.5)
ggsave(fig.maps, filename=file.path(PROJHOME, "figures", "fig_maps.png"),
width=6, height=3, units="in", scale=1.5)
Do members of the government or ruling party sit on the NGO’s board?
responses.full %>%
group_by(Q3.27) %>%
summarise(num = n()) %>%
filter(!is.na(Q3.27)) %>%
mutate(prop = num / sum(num))
## Source: local data frame [3 x 3]
##
## Q3.27 num prop
## (fctr) (int) (dbl)
## 1 No 475 0.86837294
## 2 Yes 41 0.07495430
## 3 Don't know 31 0.05667276
For those that said yes, is the NGO required to have a government official sit on the board?
responses.full %>%
group_by(Q3.28) %>%
summarise(num = n()) %>%
filter(!is.na(Q3.28)) %>%
mutate(prop = num / sum(num))
## Source: local data frame [3 x 3]
##
## Q3.28 num prop
## (fctr) (int) (dbl)
## 1 No 25 0.625
## 2 Yes 13 0.325
## 3 Don't know 2 0.050
How restricted does the NGO feel by the host government?
responses.full %>%
group_by(Q3.29) %>%
summarise(num = n()) %>%
filter(!is.na(Q3.29)) %>%
mutate(prop = num / sum(num)) %T>%
{cat("Number of responses:", sum(.$num), "\n")}
## Number of responses: 547
## Source: local data frame [6 x 3]
##
## Q3.29 num prop
## (fctr) (int) (dbl)
## 1 Not restricted 201 0.36745887
## 2 Very little restricted 118 0.21572212
## 3 A little restricted 51 0.09323583
## 4 Somewhat restricted 93 0.17001828
## 5 Very restricted 41 0.07495430
## 6 Don't know 43 0.07861060
Which countries do NGOs say are restrictive?
responses.full %>%
filter(Q3.29 %in% c("Somewhat restricted", "Very restricted")) %>%
group_by(work.country) %>%
summarise(num = n()) %>%
arrange(desc(num)) %T>%
{cat("Number of countries:", nrow(.), "\n")}
## Number of countries: 66
## Source: local data frame [66 x 2]
##
## work.country num
## (chr) (int)
## 1 India 11
## 2 Nigeria 7
## 3 Nepal 6
## 4 Thailand 6
## 5 Cambodia 5
## 6 Congo, the Democratic Republic of the 4
## 7 United Kingdom 4
## 8 Australia 3
## 9 Iraq 3
## 10 Italy 3
## .. ... ...
# Select just the columns that have cowcodes embedded in them
active.embassies.raw <- responses.countries %>%
select(contains("_c", ignore.case=FALSE)) %>%
mutate_each(funs(as.numeric(levels(.))[.])) # Convert values to numeric
# Select only the rows where they responded (i.e. not all columns are NA)
num.responses <- active.embassies.raw %>%
rowwise() %>% do(all.missing = all(!is.na(.))) %>%
ungroup() %>% mutate(all.missing = unlist(all.missing)) %>%
summarise(total = sum(all.missing))
# Tidy cowcode columns and summarize most commonly mentioned countries
active.embassies <- active.embassies.raw %>%
gather(country.raw, num) %>%
group_by(country.raw) %>% summarise(num = sum(num, na.rm=TRUE)) %>%
mutate(country.raw = str_replace(country.raw, "Q.*c", ""),
country = countrycode(country.raw, "cown", "country.name"),
country = ifelse(country.raw == "2070", "European Union", country)) %>%
ungroup() %>% mutate(prop = num / num.responses$total,
prop.nice = sprintf("%.1f%%", prop * 100))
Which embassies or foreign governments NGOs were reported as active partners in the fight against human trafficking?
active.embassies.top <- active.embassies %>%
arrange(num) %>% select(-country.raw) %>%
filter(num > 10) %>%
mutate(country = factor(country, levels=country, ordered=TRUE)) %>%
arrange(desc(num))
active.embassies %>% arrange(desc(num)) %>%
select(-country.raw) %>% filter(num > 10)
## Source: local data frame [12 x 4]
##
## num country prop prop.nice
## (dbl) (chr) (dbl) (chr)
## 1 260 United States 0.76923077 76.9%
## 2 43 United Kingdom 0.12721893 12.7%
## 3 35 Netherlands 0.10355030 10.4%
## 4 30 France 0.08875740 8.9%
## 5 26 Norway 0.07692308 7.7%
## 6 26 European Union 0.07692308 7.7%
## 7 24 Switzerland 0.07100592 7.1%
## 8 21 Sweden 0.06213018 6.2%
## 9 19 Australia 0.05621302 5.6%
## 10 17 Germany 0.05029586 5.0%
## 11 17 Italy 0.05029586 5.0%
## 12 12 Canada 0.03550296 3.6%
nrow(active.embassies) # Number of countries mentioned
## [1] 64
num.responses$total # Total responses
## [1] 338
# Most active embassies
# Save Q3.7 to a CSV for hand coding
most.active <- responses.countries %>%
select(Q3.7) %>%
filter(!is.na(Q3.7))
write_csv(most.active, path=file.path(PROJHOME, "data",
"most_active_WILL_BE_OVERWRITTEN.csv"))
# Read in hand-coded CSV
if (file.exists(file.path(PROJHOME, "data", "most_active.csv"))) {
most.active <- read_csv(file.path(PROJHOME, "data", "most_active.csv"))
} else {
stop("data/most_active.csv is missing")
}
# Split comma-separated countries, unnest them into multiple rows, and
# summarize most active countries
most.active.clean <- most.active %>%
transform(clean = strsplit(clean, ",")) %>%
unnest(clean) %>%
mutate(clean = str_trim(clean)) %>%
group_by(clean) %>%
summarise(total = n()) %>%
mutate(prop = total / nrow(most.active),
prop.nice = sprintf("%.1f%%", prop * 100))
Which countries or embassies have been the most active?
most.active.clean %>% arrange(desc(total))
## Source: local data frame [40 x 4]
##
## clean total prop prop.nice
## (chr) (int) (dbl) (chr)
## 1 United States 188 0.70149254 70.1%
## 2 None 16 0.05970149 6.0%
## 3 European Union 14 0.05223881 5.2%
## 4 All 12 0.04477612 4.5%
## 5 Switzerland 8 0.02985075 3.0%
## 6 Australia 7 0.02611940 2.6%
## 7 Italy 7 0.02611940 2.6%
## 8 United Kingdom 7 0.02611940 2.6%
## 9 Netherlands 6 0.02238806 2.2%
## 10 Norway 6 0.02238806 2.2%
## .. ... ... ... ...
nrow(most.active.clean) - 1 # Subtract one because of "None"s
## [1] 39
Over the last 10–15 years, has the United States or its embassy been active in the fight against human trafficking in X?
responses.countries$Q3.8 %>% table %>% print %>% prop.table
## .
## No Yes Don't know
## 39 344 150
## .
## No Yes Don't know
## 0.07317073 0.64540338 0.28142589
Side-by-side graph of active countries + most active countries
plot.data <- active.embassies.top %>%
bind_rows(data_frame(num=0, country=c("All", "None"),
prop=0, prop.nice="")) %>%
arrange(num) %>%
mutate(country = factor(country, levels=country, ordered=TRUE))
plot.data.active <- most.active.clean %>%
filter(clean %in% plot.data$country) %>%
mutate(country = factor(clean, levels=levels(plot.data$country), ordered=TRUE))
fig.active <- ggplot(plot.data, aes(x=country, y=num)) +
geom_bar(stat="identity") +
geom_text(aes(label = prop.nice), size=3.5, hjust=1.3,
family="Source Sans Pro Light") +
labs(x=NULL, y="Number of times country was mentioned as a partner in anti-TIP work") +
scale_y_continuous(breaks=seq(0, max(active.embassies$num), by=25),
trans="reverse", expand = c(.1, .1)) +
coord_flip() +
theme_clean() +
theme(axis.text.y = element_blank(),
axis.line.y = element_blank(),
plot.margin = unit(c(1,0.5,1,1), "lines"))
fig.most.active <- ggplot(plot.data.active, aes(x=country, y=total)) +
geom_bar(stat="identity") +
geom_text(aes(label = prop.nice), size=3.5, hjust=-0.3,
family="Source Sans Pro Light") +
labs(x=NULL, y="Number of times country was mentioned as the most active partner in anti-TIP work") +
scale_y_continuous(expand = c(.15, .15)) +
coord_flip() +
theme_clean() +
theme(axis.text.y = element_text(hjust=0.5),
axis.line.y = element_blank(),
plot.margin = unit(c(1,1,1,0), "lines"))
fig.embassies <- arrangeGrob(fig.active, fig.most.active, nrow=1)
grid.draw(fig.embassies)
ggsave(fig.embassies, filename=file.path(PROJHOME, "figures", "fig_embassies.pdf"),
width=5, height=2, units="in", device=cairo_pdf, scale=2.5)
ggsave(fig.embassies, filename=file.path(PROJHOME, "figures", "fig_embassies.png"),
width=5, height=2, units="in", scale=2.5)
saveRDS(active.embassies, file.path(PROJHOME, "data", "active_embassies.rds"))
saveRDS(most.active.clean, file.path(PROJHOME, "data", "most_active_embassies.rds"))
Actual US activities
cols <- c("Q3.9_1", "Q3.9_2", "Q3.9_3", "Q3.9_4", "Q3.9_5",
"Q3.9_6", "Q3.9_7", "Q3.9_8", "Q3.9_9", "Q3.9_10")
labels <- c("Asking for legislation", "Convening conferences or workshops",
"Raising awareness", "Providing resources or funding",
"Increasing government attention", "Training government officials",
"Contributing to a government action plan", "Other", "Don't know",
"The US has not been involved in trafficking issues")
activities <- separate.answers.summary(responses.countries, cols, labels)
activities$denominator # Number of responses
## [1] 532
activities$df
## Source: local data frame [10 x 4]
##
## Answer Responses % plot.pct
## (fctr) (int) (dbl) (dbl)
## 1 Asking for legislation 165 31.02 0.31015038
## 2 Convening conferences or workshops 208 39.10 0.39097744
## 3 Raising awareness 214 40.23 0.40225564
## 4 Providing resources or funding 212 39.85 0.39849624
## 5 Increasing government attention 217 40.79 0.40789474
## 6 Training government officials 146 27.44 0.27443609
## 7 Contributing to a government action plan 114 21.43 0.21428571
## 8 Other 43 8.08 0.08082707
## 9 Don't know 26 4.89 0.04887218
## 10 The US has not been involved in trafficking issues 166 31.20 0.31203008
plot.data <- activities$df %>%
mutate(Answer=factor(Answer, levels=rev(labels), ordered=TRUE))
fig.activities <- ggplot(plot.data, aes(x=Answer, y=Responses)) +
geom_bar(aes(y=plot.pct), stat="identity") +
labs(x=NULL, y=NULL) +
scale_y_continuous(labels = percent,
breaks = seq(0, max(round(plot.data$plot.pct, 1)), by=0.1)) +
coord_flip() + theme_clean()
fig.activities
ggsave(fig.activities, filename=file.path(PROJHOME, "figures", "fig_activities.pdf"),
width=6.5, height=5, units="in", device=cairo_pdf)
ggsave(fig.activities, filename=file.path(PROJHOME, "figures", "fig_activities.png"),
width=6.5, height=5, units="in")
plot.data <- responses.full %>%
group_by(Q3.19) %>%
summarize(num = n()) %>%
na.omit() %>%
mutate(prop = num / sum(num),
prop.nice = sprintf("%.1f%%", prop * 100),
Q3.19 = factor(Q3.19, levels=rev(levels(Q3.19)), ordered=TRUE))
plot.data
## Source: local data frame [4 x 4]
##
## Q3.19 num prop prop.nice
## (fctr) (int) (dbl) (chr)
## 1 Most important actor 139 0.2662835 26.6%
## 2 Somewhat important actor 182 0.3486590 34.9%
## 3 Not an important actor 68 0.1302682 13.0%
## 4 Don't know 133 0.2547893 25.5%
fig.us_importance <- ggplot(plot.data, aes(x=Q3.19, y=prop)) +
geom_bar(stat="identity") +
labs(x=NULL, y=NULL) +
scale_y_continuous(labels = percent,
breaks = seq(0, max(round(plot.data$num, 1)), by=0.1)) +
coord_flip() + theme_clean()
fig.us_importance
ggsave(fig.us_importance, filename=file.path(PROJHOME, "figures", "fig_us_importance.pdf"),
width=6.5, height=5, units="in", device=cairo_pdf)
ggsave(fig.us_importance, filename=file.path(PROJHOME, "figures", "fig_us_importance.png"),
width=6.5, height=5, units="in")
Average importance by country
importance.plot <- country.indexes %>%
filter(num.responses >= 10) %>%
arrange(importance_score) %>%
mutate(country_label = factor(work.country, levels=unique(work.country),
labels=paste0(work.country, " (", num.responses, ")"),
ordered=TRUE))
fig.avg_importance <- ggplot(importance.plot, aes(x=country_label, y=importance_score)) +
geom_rect(data=importance.levels, aes(x=NULL, y=NULL, ymin=start, ymax=end,
xmin=0, xmax=Inf, fill=level.ordered), alpha=0.5) +
geom_pointrange(aes(ymax=importance_score + importance_stdev,
ymin=importance_score - importance_stdev)) +
labs(x="Country (number of responses)",
y="Importance of the US in anti-TIP efforts (mean)") +
scale_fill_manual(values=c("grey90", "grey60", "grey30"), name=NULL) +
coord_flip() +
theme_clean() + theme(legend.position="bottom")
fig.avg_importance
ggsave(fig.avg_importance, filename=file.path(PROJHOME, "figures", "fig_avg_importance.pdf"),
width=6.5, height=5, units="in", device=cairo_pdf)
ggsave(fig.avg_importance, filename=file.path(PROJHOME, "figures", "fig_avg_importance.png"),
width=6.5, height=5, units="in")
df.importance <- responses.full %>%
select(Q3.19, work.country, change_policy, avg_tier, improve_tip, change_policy,
importance, received.funding, us.involvement, total.funding,
total.freedom, us.hq, time.spent=Q2.1) %>%
filter(!is.na(Q3.19)) %>%
mutate(importance_factor = factor(Q3.19, ordered=FALSE),
log.total.funding = log1p(total.funding),
time.spent = as.numeric(time.spent))
Average tier doesn’t show much because it doesn’t show any changes in time—just how bad the country is in general?
importance.means <- df.importance %>%
group_by(Q3.19) %>%
summarize(avg_points = mean(avg_tier, na.rm=TRUE),
var_points = var(avg_tier, na.rm=TRUE)) %>%
print
## Source: local data frame [4 x 3]
##
## Q3.19 avg_points var_points
## (fctr) (dbl) (dbl)
## 1 Most important actor 2.053344 0.1171988
## 2 Somewhat important actor 1.920723 0.2256463
## 3 Not an important actor 1.763046 0.3492590
## 4 Don't know 1.744558 0.2864291
Plot group means and distributions
fig.importance <- ggplot(df.importance, aes(x=Q3.19, y=avg_tier)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05, show.legend=FALSE) +
geom_point(data=importance.means, aes(x=Q3.19, y=avg_points), size=5, show.legend=FALSE) +
labs(x="Opinion of US importance", y="Average TIP tier rating") +
coord_flip() + theme_clean()
fig.importance
Those means appear slightly different from each other. Is that really the case? Check with ANOVA, which assumes homogenous variance across groups. Throw every possible test at it—if null is rejected (p < 0.05 or whatever) then variance is likely heterogenous: (helpful reference)
bartlett.test(avg_tier ~ importance_factor, data=df.importance)
##
## Bartlett test of homogeneity of variances
##
## data: avg_tier by importance_factor
## Bartlett's K-squared = 35.422, df = 3, p-value = 9.923e-08
car::leveneTest(avg_tier ~ importance_factor, data=df.importance)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 14.266 6.052e-09 ***
## 517
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fligner.test(avg_tier ~ importance_factor, data=df.importance) # Uses median
##
## Fligner-Killeen test of homogeneity of variances
##
## data: avg_tier by importance_factor
## Fligner-Killeen:med chi-squared = 28.052, df = 3, p-value = 3.542e-06
kruskal.test(avg_tier ~ importance_factor, data=df.importance) # Nonparametric
##
## Kruskal-Wallis rank sum test
##
## data: avg_tier by importance_factor
## Kruskal-Wallis chi-squared = 23.591, df = 3, p-value = 3.041e-05
All of those p-values are tiny, so it’s clear that variance is not the same across groups. However, there’s a rule of thumb (super detailed example) that ANOVA is robust to heterogeneity of variance as long as the largest variance is less than four times the smallest variance.
Given that rule of thumb, the variance here isn’t that much of an issue
df.importance %>% group_by(importance_factor) %>%
summarise(variance = var(avg_tier, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 2.980057
It would be cool to use Bayesian ANOVA to account for non-homogenous variances (see John Kruschke’s evangelizing), since it handles violations of ANOVA assumptions nicely. However, in his example, the ratio of min/max variance is huge, so it does lead to big differences in results:
#
# read_csv("http://www.indiana.edu/~kruschke/DoingBayesianDataAnalysis/Programs/NonhomogVarData.csv") %>%
# group_by(Group) %>%
# summarise(variance = var(Y)) %>%
# do(data_frame(ratio = max(.$variance) / min(.$variance)))
# # ratio = 64
#
With the variance issue handled, run the ANOVA:
importance.aov <- aov(avg_tier ~ importance_factor, data=df.importance)
summary(importance.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## importance_factor 3 7.79 2.5963 11.38 3.1e-07 ***
## Residuals 517 118.00 0.2282
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
There is some significant difference between groups. Look at pairwise comparisons between all the groups to (kind of) decompose that finding:
(importance.pairs <- TukeyHSD(importance.aov, "importance_factor"))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = avg_tier ~ importance_factor, data = df.importance)
##
## $importance_factor
## diff lwr upr p adj
## Somewhat important actor-Most important actor -0.1326211 -0.2714899 0.006247627 0.0673115
## Not an important actor-Most important actor -0.2902984 -0.4725198 -0.108076873 0.0002725
## Don't know-Most important actor -0.3087859 -0.4581435 -0.159428195 0.0000009
## Not an important actor-Somewhat important actor -0.1576772 -0.3328160 0.017461509 0.0947138
## Don't know-Somewhat important actor -0.1761647 -0.3167941 -0.035535362 0.0072161
## Don't know-Not an important actor -0.0184875 -0.2020542 0.165079256 0.9938662
Plot the differences:
importance.pairs.plot <- data.frame(importance.pairs$importance_factor) %>%
mutate(pair = row.names(.),
pair = factor(pair, levels=pair, ordered=TRUE),
stars = add.stars(p.adj))
fig.importance.pairs <- ggplot(importance.pairs.plot,
aes(x=pair, y=diff, ymax=upr, ymin=lwr)) +
geom_hline(yintercept=0) +
geom_text(aes(label=stars), nudge_x=0.25) +
geom_pointrange() +
theme_clean() + coord_flip()
fig.importance.pairs
Another way of checking group means in non-homogenous data is to use ordinal logistic regression. Here’s an ordered logit and corresponding predicted probabilities:
model.importance <- ordinal::clm(Q3.19 ~ avg_tier, data=df.importance,
link="logit", Hess=TRUE)
summary(model.importance)
## formula: Q3.19 ~ avg_tier
## data: df.importance
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 521 -679.73 1367.46 6(0) 1.69e-12 2.1e+02
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## avg_tier -0.8863 0.1618 -5.479 4.27e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Most important actor|Somewhat important actor -2.7248 0.3305 -8.244
## Somewhat important actor|Not an important actor -1.1795 0.3119 -3.782
## Not an important actor|Don't know -0.5470 0.3092 -1.769
## (1 observation deleted due to missingness)
# Predicted probabilities
newdata <- data_frame(avg_tier = seq(0, 3, 0.1))
pred.importance <- predict(model.importance, newdata, interval=TRUE)
# Create plot data
pred.plot.lower <- cbind(newdata, pred.importance$lwr) %>%
gather(importance, lwr, -c(1:ncol(newdata)))
pred.plot.upper <- cbind(newdata, pred.importance$upr) %>%
gather(importance, upr, -c(1:ncol(newdata)))
pred.plot.data <- cbind(newdata, pred.importance$fit) %>%
gather(importance, importance_prob, -c(1:ncol(newdata))) %>%
left_join(pred.plot.lower, by=c("avg_tier", "importance")) %>%
left_join(pred.plot.upper, by=c("avg_tier", "importance"))
importance.colors <- c("grey20", "grey40", "grey60", "grey80")
ggplot(pred.plot.data, aes(x=avg_tier, y=importance_prob)) +
geom_ribbon(aes(ymax=upr, ymin=lwr, fill=importance),
alpha=0.2) +
geom_line(aes(colour=importance), size=2) +
scale_y_continuous(labels=percent) +
labs(x="Average tier rating in country",
y="Predicted probability of assigning importance") +
# scale_fill_manual(values=importance.colors, name=NULL) +
# scale_colour_manual(values=importance.colors, name=NULL) +
theme_clean()
Opinions of importance are not related to changes in TIP score. The average Improvement in TIP rating is the same for each possible answer of importance.
ggplot(df.importance, aes(x=Q3.19, y=improve_tip)) +
geom_violin(fill="grey90") +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Improvement in TIP tier rating") +
coord_flip() + theme_clean()
Variance is equal in all groups:
kruskal.test(improve_tip ~ importance_factor, data=df.importance)
##
## Kruskal-Wallis rank sum test
##
## data: improve_tip by importance_factor
## Kruskal-Wallis chi-squared = 0.098657, df = 3, p-value = 0.992
ANOVA shows no differences:
change.anova <- aov(improve_tip ~ importance_factor, data=df.importance)
summary(change.anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## importance_factor 3 0.16 0.05178 0.183 0.908
## Residuals 517 146.15 0.28270
## 1 observation deleted due to missingness
TukeyHSD(change.anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = improve_tip ~ importance_factor, data = df.importance)
##
## $importance_factor
## diff lwr upr p adj
## Somewhat important actor-Most important actor 0.033069677 -0.1214814 0.1876208 0.9461281
## Not an important actor-Most important actor 0.045175624 -0.1576240 0.2479753 0.9397810
## Don't know-Most important actor 0.005923081 -0.1603014 0.1721476 0.9997222
## Not an important actor-Somewhat important actor 0.012105947 -0.1828111 0.2070230 0.9985387
## Don't know-Somewhat important actor -0.027146596 -0.1836571 0.1293639 0.9701783
## Don't know-Not an important actor -0.039252543 -0.2435494 0.1650443 0.9601399
Opinions of importance vary slightly by changes in Cho policy scores. Respondents who indicated that the US was more important tended to work in countries with greater changes in TIP policy.
ggplot(df.importance, aes(x=Q3.19, y=change_policy)) +
geom_violin(fill="grey90") +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Change in TIP policy index") +
coord_flip() + theme_clean()
Variance is equal in all groups:
kruskal.test(change_policy ~ importance_factor, data=df.importance)
##
## Kruskal-Wallis rank sum test
##
## data: change_policy by importance_factor
## Kruskal-Wallis chi-squared = 4.7541, df = 3, p-value = 0.1907
ANOVA shows no differences
cho.change.anova <- aov(change_policy ~ importance_factor, data=df.importance)
summary(cho.change.anova) # (⌐○Ϟ○) ♥ \(•◡•)/
## Df Sum Sq Mean Sq F value Pr(>F)
## importance_factor 3 35 11.769 1.766 0.153
## Residuals 517 3445 6.663
## 1 observation deleted due to missingness
TukeyHSD(cho.change.anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = change_policy ~ importance_factor, data = df.importance)
##
## $importance_factor
## diff lwr upr p adj
## Somewhat important actor-Most important actor -0.3665885 -1.1169034 0.3837264 0.5893507
## Not an important actor-Most important actor -0.8270207 -1.8115725 0.1575310 0.1344747
## Don't know-Most important actor -0.4958620 -1.3028488 0.3111249 0.3888147
## Not an important actor-Somewhat important actor -0.4604322 -1.4067155 0.4858511 0.5926605
## Don't know-Somewhat important actor -0.1292735 -0.8891009 0.6305540 0.9717760
## Don't know-Not an important actor 0.3311588 -0.6606615 1.3229791 0.8251392
Organizations that have been received funding from the US are more likely to consider the US to play an important role in the countries they work in.
funding.table <- df.importance %>%
xtabs(~ Q3.19 + received.funding, .) %>% print
## received.funding
## Q3.19 FALSE TRUE
## Most important actor 81 58
## Somewhat important actor 149 33
## Not an important actor 65 3
## Don't know 131 2
There’s an overall significant difference (though two of the cells are really small here)
(funding.chi <- chisq.test(funding.table))
##
## Pearson's Chi-squared test
##
## data: funding.table
## X-squared = 84.566, df = 3, p-value < 2.2e-16
# Cramer's V for standardized measure of association
assocstats(funding.table)
## X^2 df P(> X^2)
## Likelihood Ratio 91.734 3 0
## Pearson 84.566 3 0
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.373
## Cramer's V : 0.402
# Components of chi-squared
(components <- funding.chi$residuals^2)
## received.funding
## Q3.19 FALSE TRUE
## Most important actor 9.275164424 41.158542132
## Somewhat important actor 0.001495267 0.006635247
## Not an important actor 1.628262816 7.225416244
## Don't know 4.647505115 20.623303950
round(1-pchisq(components, funding.chi$parameter), 3)
## received.funding
## Q3.19 FALSE TRUE
## Most important actor 0.026 0.000
## Somewhat important actor 1.000 1.000
## Not an important actor 0.653 0.065
## Don't know 0.200 0.000
# Visualize differences
mosaic(funding.table,
labeling_args=list(set_varnames=c(received.funding="Received US funding",
Q3.19="Opinion of US"),
gp_labels=(gpar(fontsize=8))),
gp_varnames=gpar(fontsize=10, fontface=2))
Opinions of importance are strongly associated with US TIP funding given to a country. Organizations are more likely to think the US is an important actor if they work in countries receiving more anti-TIP funding.
ggplot(df.importance, aes(x=Q3.19, y=log.total.funding)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05) +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Total TIP funding to country (logged)") +
scale_y_continuous(labels=trans_format("exp", dollar_format())) +
coord_flip() + theme_clean()
Variance is not equal in all groups:
kruskal.test(log.total.funding ~ importance_factor, data=df.importance)
##
## Kruskal-Wallis rank sum test
##
## data: log.total.funding by importance_factor
## Kruskal-Wallis chi-squared = 21.659, df = 3, p-value = 7.68e-05
Ratio is 3ish, which is below 4, so heterogenous variance is okayish:
df.importance %>% group_by(importance_factor) %>%
summarise(variance = var(log.total.funding, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 3.006739
ANOVA shows significant differences:
funding.anova <- aov(log.total.funding ~ importance_factor, data=df.importance)
summary(funding.anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## importance_factor 3 1234 411.2 11.92 1.48e-07 ***
## Residuals 508 17524 34.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 10 observations deleted due to missingness
(funding.pairs <- TukeyHSD(funding.anova))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = log.total.funding ~ importance_factor, data = df.importance)
##
## $importance_factor
## diff lwr upr p adj
## Somewhat important actor-Most important actor -1.241784 -2.974848 0.4912799 0.2526710
## Not an important actor-Most important actor -4.368679 -6.625553 -2.1118050 0.0000050
## Don't know-Most important actor -3.309266 -5.169210 -1.4493229 0.0000337
## Not an important actor-Somewhat important actor -3.126895 -5.283433 -0.9703573 0.0011831
## Don't know-Somewhat important actor -2.067482 -3.804308 -0.3306565 0.0121187
## Don't know-Not an important actor 1.059413 -1.200352 3.3191771 0.6217846
See those differences
funding.pairs.plot <- data.frame(funding.pairs$importance_factor) %>%
mutate(pair = row.names(.),
pair = factor(pair, levels=pair, ordered=TRUE),
stars = add.stars(p.adj))
fig.funding.pairs <- ggplot(funding.pairs.plot,
aes(x=pair, y=diff, ymax=upr, ymin=lwr)) +
geom_hline(yintercept=0) +
geom_text(aes(label=stars), nudge_x=0.25) +
geom_pointrange() +
theme_clean() + coord_flip()
fig.funding.pairs
US importance appears to be associated with the level of democracy in a country. NGOs working in countries with worse democracy (higher numbers of the total freedom scale) are more likely to see the US as the most important anti-TIP actor in that country. Or, rather, on average total freedom is worse in countries where NGOs indicate the US as the most important actor.
ggplot(df.importance, aes(x=Q3.19, y=total.freedom)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05) +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US",
y="Total freedom (political rights + civil liberties; higher is worse)") +
coord_flip() + theme_clean()
Variance is not equal in all groups:
kruskal.test(total.freedom ~ importance_factor, data=df.importance)
##
## Kruskal-Wallis rank sum test
##
## data: total.freedom by importance_factor
## Kruskal-Wallis chi-squared = 21.262, df = 3, p-value = 9.288e-05
Ratio between min and max variance is low, so we’re okay:
df.importance %>% group_by(importance_factor) %>%
summarise(variance = var(total.freedom, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 1.431773
ANOVA shows significant differences:
democracy.anova <- aov(total.freedom ~ importance_factor, data=df.importance)
summary(democracy.anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## importance_factor 3 208 69.24 6.486 0.000259 ***
## Residuals 512 5465 10.67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 6 observations deleted due to missingness
(democracy.pairs <- TukeyHSD(democracy.anova))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total.freedom ~ importance_factor, data = df.importance)
##
## $importance_factor
## diff lwr upr p adj
## Somewhat important actor-Most important actor -0.8469545051 -1.804602 0.1106928 0.1041491
## Not an important actor-Most important actor -1.6001193070 -2.852380 -0.3478582 0.0058153
## Don't know-Most important actor -1.5999919833 -2.630782 -0.5692024 0.0004202
## Not an important actor-Somewhat important actor -0.7531648018 -1.950937 0.4446072 0.3677940
## Don't know-Somewhat important actor -0.7530374782 -1.716898 0.2108229 0.1842105
## Don't know-Not an important actor 0.0001273237 -1.256892 1.2571462 1.0000000
View the differences:
democracy.pairs.plot <- data.frame(democracy.pairs$importance_factor) %>%
mutate(pair = row.names(.),
pair = factor(pair, levels=pair, ordered=TRUE),
stars = add.stars(p.adj))
fig.democracy.pairs <- ggplot(democracy.pairs.plot,
aes(x=pair, y=diff, ymax=upr, ymin=lwr)) +
geom_hline(yintercept=0) +
geom_text(aes(label=stars), nudge_x=0.25) +
geom_pointrange() +
theme_clean() + coord_flip()
fig.democracy.pairs
The time NGOs spend on trafficking issues does not appear to be associated with their opinion of US importance.
ggplot(df.importance, aes(x=Q3.19, y=time.spent)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05) +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Time spent on trafficking issues") +
coord_flip() + theme_clean()
Variance is not equal in all groups:
kruskal.test(time.spent ~ importance_factor, data=df.importance)
##
## Kruskal-Wallis rank sum test
##
## data: time.spent by importance_factor
## Kruskal-Wallis chi-squared = 7.5031, df = 3, p-value = 0.05748
Ratio between min and max variance is low, so we’re okay:
df.importance %>% group_by(importance_factor) %>%
summarise(variance = var(time.spent, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 1.365997
ANOVA shows some small overall signifcant differences, but when decomposed, that effect is coming only from the tiny “Don’t know-Somewhat important actor” difference.
time.anova <- aov(time.spent ~ importance_factor, data=df.importance)
summary(time.anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## importance_factor 3 9603 3201 2.9 0.0346 *
## Residuals 491 541945 1104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 27 observations deleted due to missingness
(time.pairs <- TukeyHSD(time.anova))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = time.spent ~ importance_factor, data = df.importance)
##
## $importance_factor
## diff lwr upr p adj
## Somewhat important actor-Most important actor 8.5081361 -1.357648 18.373920 0.1184138
## Not an important actor-Most important actor 7.6875000 -5.294752 20.669752 0.4223730
## Don't know-Most important actor -1.0297619 -11.619626 9.560102 0.9944664
## Not an important actor-Somewhat important actor -0.8206361 -13.390747 11.749474 0.9983036
## Don't know-Somewhat important actor -9.5378980 -19.618277 0.542481 0.0712221
## Don't know-Not an important actor -8.7172619 -21.863334 4.428810 0.3198290
Organizations that have been involved with the US are more likely to consider the US to play an important role in the countries they work in.
involvement.table <- df.importance %>%
xtabs(~ Q3.19 + us.involvement, .) %>% print
## us.involvement
## Q3.19 FALSE TRUE
## Most important actor 23 116
## Somewhat important actor 48 134
## Not an important actor 38 30
## Don't know 73 60
There’s an overall significant difference
(involvement.chi <- chisq.test(involvement.table))
##
## Pearson's Chi-squared test
##
## data: involvement.table
## X-squared = 63.022, df = 3, p-value = 1.328e-13
# Cramer's V for standardized measure of association
assocstats(involvement.table)
## X^2 df P(> X^2)
## Likelihood Ratio 63.914 3 8.5598e-14
## Pearson 63.022 3 1.3278e-13
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.328
## Cramer's V : 0.347
# Components of chi-squared
(components <- involvement.chi$residuals^2)
## us.involvement
## Q3.19 FALSE TRUE
## Most important actor 13.379010 7.161705
## Somewhat important actor 3.764597 2.015167
## Not an important actor 8.614436 4.611257
## Don't know 15.291006 8.185186
1-pchisq(components, involvement.chi$parameter)
## us.involvement
## Q3.19 FALSE TRUE
## Most important actor 0.003884710 0.066918431
## Somewhat important actor 0.288031028 0.569264781
## Not an important actor 0.034881684 0.202578528
## Don't know 0.001584118 0.042335569
# Visualize differences
mosaic(involvement.table,
labeling_args=list(set_varnames=c(us.involvement="US involvement",
Q3.19="Opinion of US"),
gp_labels=(gpar(fontsize=8))),
gp_varnames=gpar(fontsize=10, fontface=2))
NGOs with headquarters in the US are not significantly different from their foreign counterparts in their opinions of the importance of the US.
hq.table <- df.importance %>%
xtabs(~ Q3.19 + us.hq, .) %>% print
## us.hq
## Q3.19 FALSE TRUE
## Most important actor 129 10
## Somewhat important actor 171 11
## Not an important actor 59 9
## Don't know 115 18
There’s no overall significant difference
(hq.chi <- chisq.test(hq.table))
##
## Pearson's Chi-squared test
##
## data: hq.table
## X-squared = 7.1586, df = 3, p-value = 0.06701
# Cramer's V is really low
assocstats(hq.table)
## X^2 df P(> X^2)
## Likelihood Ratio 6.9902 3 0.072211
## Pearson 7.1586 3 0.067010
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.116
## Cramer's V : 0.117
# Components of chi-squared
(components <- hq.chi$residuals^2)
## us.hq
## Q3.19 FALSE TRUE
## Most important actor 0.06130129 0.60535020
## Somewhat important actor 0.19905971 1.96571460
## Not an important actor 0.12221951 1.20691768
## Don't know 0.27568266 2.72236626
1-pchisq(components, hq.chi$parameter)
## us.hq
## Q3.19 FALSE TRUE
## Most important actor 0.9960368 0.8952065
## Somewhat important actor 0.9777410 0.5795532
## Not an important actor 0.9890438 0.7513456
## Don't know 0.9645351 0.4364398
# Visualize differences
mosaic(hq.table,
labeling_args=list(set_varnames=c(us.involvement="US involvement",
Q3.19="Opinion of US"),
gp_labels=(gpar(fontsize=8))),
gp_varnames=gpar(fontsize=10, fontface=2))
Important caveat: Respondents were only asked about their opinions of the US’s work (Q3.25) if they indicated that the US was a somewhat important actor or the most important actor (Q3.19)
df.positivity<- responses.full %>%
select(Q3.25=Q3.25_collapsed, work.country, change_policy, avg_tier,
improve_tip, change_policy, importance, received.funding, us.involvement,
total.funding, total.freedom, us.hq, time.spent=Q2.1) %>%
filter(!is.na(Q3.25)) %>%
mutate(positivity_factor = factor(Q3.25, ordered=FALSE),
log.total.funding = log1p(total.funding),
time.spent = as.numeric(time.spent))
ggplot(df.positivity, aes(x=Q3.25, y=avg_tier)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05, show.legend=FALSE) +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Average TIP tier rating") +
coord_flip() + theme_clean()
Check variance; it’s significant, so there’s heterogeneity
kruskal.test(avg_tier ~ positivity_factor, data=df.positivity)
##
## Kruskal-Wallis rank sum test
##
## data: avg_tier by positivity_factor
## Kruskal-Wallis chi-squared = 9.3081, df = 2, p-value = 0.009523
But the ratio is okay
df.positivity %>% group_by(positivity_factor) %>%
summarise(variance = var(avg_tier, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 2.479382
ANOVA is significant, but right on the threshold, mostly driven by difference between positive and mixed.
tier.anova.pos <- aov(avg_tier ~ positivity_factor, data=df.positivity)
summary(tier.anova.pos)
## Df Sum Sq Mean Sq F value Pr(>F)
## positivity_factor 2 1.11 0.5535 3.053 0.0486 *
## Residuals 310 56.20 0.1813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
(tier.pairs.pos <- TukeyHSD(tier.anova.pos))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = avg_tier ~ positivity_factor, data = df.positivity)
##
## $positivity_factor
## diff lwr upr p adj
## Mixed-Don't know 0.17866608 -0.03023035 0.387562502 0.1104703
## Positive-Don't know 0.04045791 -0.14023074 0.221146556 0.8580130
## Positive-Mixed -0.13820817 -0.28114111 0.004724777 0.0605376
NGOs who have positive opinions of the US are more likely to work in countries where the TIP rating has (slightly) decreased on average between 2000 and 2015. This may be because assigning a worse TIP rating to a country represents increased US diplomatic and economic pressure—it is a possible sign that NGOs like scorecard diplomacy.
ggplot(df.positivity, aes(x=Q3.25, y=improve_tip)) +
geom_violin(fill="grey90") +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Improvement in TIP tier rating") +
coord_flip() + theme_clean()
Variance is not equal, but the ratio is small
kruskal.test(improve_tip ~ positivity_factor, data=df.positivity)
##
## Kruskal-Wallis rank sum test
##
## data: improve_tip by positivity_factor
## Kruskal-Wallis chi-squared = 12.159, df = 2, p-value = 0.002289
df.positivity %>% group_by(positivity_factor) %>%
summarise(variance = var(improve_tip, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 1.803968
ANOVA shows overall significant difference. The difference between positive and mixed is very significant
improve.anova.pos <- aov(improve_tip ~ positivity_factor, data=df.positivity)
summary(improve.anova.pos)
## Df Sum Sq Mean Sq F value Pr(>F)
## positivity_factor 2 5.47 2.7353 8.304 0.000307 ***
## Residuals 310 102.11 0.3294
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
(improve.pairs.pos <- TukeyHSD(improve.anova.pos))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = improve_tip ~ positivity_factor, data = df.positivity)
##
## $positivity_factor
## diff lwr upr p adj
## Mixed-Don't know 0.1328125 -0.1487706 0.41439559 0.5081302
## Positive-Don't know -0.1877934 -0.4313537 0.05576683 0.1660765
## Positive-Mixed -0.3206059 -0.5132732 -0.12793868 0.0003218
improve.pairs.plot <- data.frame(improve.pairs.pos$positivity_factor) %>%
mutate(pair = row.names(.),
pair = factor(pair, levels=pair, ordered=TRUE),
stars = add.stars(p.adj))
fig.improve.pairs.pos <- ggplot(improve.pairs.plot,
aes(x=pair, y=diff, ymax=upr, ymin=lwr)) +
geom_hline(yintercept=0) +
geom_text(aes(label=stars), nudge_x=0.25) +
geom_pointrange() +
theme_clean() + coord_flip()
fig.improve.pairs.pos
In contrast to the changes in actual TIP scores, NGOs that work in countries that show greater improvement in overall TIP policies are more likely to have a positive opinion of the US, perhaps because they are happy about the actual on-the-ground improvements.
ggplot(df.positivity, aes(x=Q3.25, y=change_policy)) +
geom_violin(fill="grey90") +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Change in TIP policy index") +
coord_flip() + theme_clean()
Variance is not equal; ratio is okay
kruskal.test(change_policy ~ positivity_factor, data=df.positivity)
##
## Kruskal-Wallis rank sum test
##
## data: change_policy by positivity_factor
## Kruskal-Wallis chi-squared = 14.1, df = 2, p-value = 0.0008676
df.positivity %>% group_by(positivity_factor) %>%
summarise(variance = var(change_policy, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 1.514886
ANOVA shows significant differences, mostly because of the difference between positive and mixed.
cho.change.anova.pos <- aov(change_policy ~ positivity_factor,
data=df.positivity)
summary(cho.change.anova.pos)
## Df Sum Sq Mean Sq F value Pr(>F)
## positivity_factor 2 97.3 48.64 7.409 0.000719 ***
## Residuals 310 2035.0 6.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
(cho.change.pairs.pos <- TukeyHSD(cho.change.anova.pos))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = change_policy ~ positivity_factor, data = df.positivity)
##
## $positivity_factor
## diff lwr upr p adj
## Mixed-Don't know -0.4548611 -1.7119059 0.8021837 0.6708287
## Positive-Don't know 0.8748044 -0.2124987 1.9621074 0.1418826
## Positive-Mixed 1.3296655 0.4695593 2.1897717 0.0009270
cho.change.pairs.plot <- data.frame(cho.change.pairs.pos$positivity_factor) %>%
mutate(pair = row.names(.),
pair = factor(pair, levels=pair, ordered=TRUE),
stars = add.stars(p.adj))
fig.cho.change.pairs.pos <- ggplot(cho.change.pairs.plot,
aes(x=pair, y=diff, ymax=upr, ymin=lwr)) +
geom_hline(yintercept=0) +
geom_text(aes(label=stars), nudge_x=0.25) +
geom_pointrange() +
theme_clean() + coord_flip()
fig.cho.change.pairs.pos
funding.table.pos <- df.positivity %>%
xtabs(~ Q3.25 + received.funding, .) %>% print
## received.funding
## Q3.25 FALSE TRUE
## Don't know 26 10
## Mixed 52 12
## Positive 147 67
(funding.chi.pos <- chisq.test(funding.table.pos))
##
## Pearson's Chi-squared test
##
## data: funding.table.pos
## X-squared = 3.8321, df = 2, p-value = 0.1472
# Cramer's V for standardized measure of association
assocstats(funding.table.pos)
## X^2 df P(> X^2)
## Likelihood Ratio 4.0640 2 0.13107
## Pearson 3.8321 2 0.14719
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.11
## Cramer's V : 0.11
# Components of chi-squared
(components <- funding.chi.pos$residuals^2)
## received.funding
## Q3.25 FALSE TRUE
## Don't know 0.001610443 0.004071344
## Mixed 0.822094834 2.078329636
## Positive 0.262453717 0.663506589
round(1-pchisq(components, funding.chi.pos$parameter), 3)
## received.funding
## Q3.25 FALSE TRUE
## Don't know 0.999 0.998
## Mixed 0.663 0.354
## Positive 0.877 0.718
# Visualize differences
mosaic(funding.table.pos,
labeling_args=list(set_varnames=c(received.funding="Received US funding",
Q3.25="Opinion of US"),
gp_labels=(gpar(fontsize=8))),
gp_varnames=gpar(fontsize=10, fontface=2))
ggplot(df.positivity, aes(x=Q3.25, y=log.total.funding)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05) +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Total TIP funding to country (logged)") +
scale_y_continuous(labels=trans_format("exp", dollar_format())) +
coord_flip() + theme_clean()
Variance is not equal in all groups; ratio is 3.2ish, so it’s okayish
kruskal.test(log.total.funding ~ positivity_factor, data=df.positivity)
##
## Kruskal-Wallis rank sum test
##
## data: log.total.funding by positivity_factor
## Kruskal-Wallis chi-squared = 9.3808, df = 2, p-value = 0.009183
df.positivity %>% group_by(positivity_factor) %>%
summarise(variance = var(log.total.funding, na.rm=TRUE)) %>%
do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
##
## ratio
## (dbl)
## 1 3.246233
ANOVA shows significant differences, mostly between mixed and don’t know (not between positive and mixed)
funding.anova.pos <- aov(log.total.funding ~ positivity_factor, data=df.positivity)
summary(funding.anova.pos)
## Df Sum Sq Mean Sq F value Pr(>F)
## positivity_factor 2 243 121.50 4.826 0.00865 **
## Residuals 302 7603 25.18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 9 observations deleted due to missingness
(funding.pairs.pos <- TukeyHSD(funding.anova.pos))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = log.total.funding ~ positivity_factor, data = df.positivity)
##
## $positivity_factor
## diff lwr upr p adj
## Mixed-Don't know 3.252743 0.7612851 5.7442008 0.0064809
## Positive-Don't know 1.818460 -0.3414378 3.9783587 0.1181484
## Positive-Mixed -1.434282 -3.1347613 0.2661964 0.1172422
ggplot(df.positivity, aes(x=Q3.25, y=total.freedom)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05) +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US",
y="Total freedom (political rights + civil liberties; higher is worse)") +
coord_flip() + theme_clean()
Variance is equal in all groups
kruskal.test(total.freedom ~ positivity_factor, data=df.positivity)
##
## Kruskal-Wallis rank sum test
##
## data: total.freedom by positivity_factor
## Kruskal-Wallis chi-squared = 3.477, df = 2, p-value = 0.1758
No significant differences
democracy.anova.pos <- aov(total.freedom ~ positivity_factor, data=df.positivity)
summary(democracy.anova.pos)
## Df Sum Sq Mean Sq F value Pr(>F)
## positivity_factor 2 30.8 15.39 1.524 0.219
## Residuals 306 3090.0 10.10
## 5 observations deleted due to missingness
(democracy.pairs.pos <- TukeyHSD(democracy.anova.pos))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = total.freedom ~ positivity_factor, data = df.positivity)
##
## $positivity_factor
## diff lwr upr p adj
## Mixed-Don't know 0.5272109 -1.050583 2.1050049 0.7113264
## Positive-Don't know -0.2664990 -1.632446 1.0994480 0.8901982
## Positive-Mixed -0.7937099 -1.868207 0.2807874 0.1921411
The time NGOs spend on trafficking issues does not appear to be associated with their opinion of US importance.
ggplot(df.positivity, aes(x=Q3.25, y=time.spent)) +
geom_violin(fill="grey90") +
geom_point(alpha=0.05) +
geom_point(stat="summary", fun.y="mean", size=5) +
labs(x="Opinion of US", y="Time spent on trafficking issues") +
coord_flip() + theme_clean()
Variance is equal in all groups:
kruskal.test(time.spent ~ positivity_factor, data=df.positivity)
##
## Kruskal-Wallis rank sum test
##
## data: time.spent by positivity_factor
## Kruskal-Wallis chi-squared = 0.25432, df = 2, p-value = 0.8806
No significant differences between anything
time.anova.pos <- aov(time.spent ~ positivity_factor, data=df.positivity)
summary(time.anova.pos)
## Df Sum Sq Mean Sq F value Pr(>F)
## positivity_factor 2 164 81.9 0.081 0.923
## Residuals 296 300856 1016.4
## 15 observations deleted due to missingness
(time.pairs.pos <- TukeyHSD(time.anova.pos))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = time.spent ~ positivity_factor, data = df.positivity)
##
## $positivity_factor
## diff lwr upr p adj
## Mixed-Don't know -1.7011710 -17.625650 14.22331 0.9656996
## Positive-Don't know 0.1546798 -13.590012 13.89937 0.9996126
## Positive-Mixed 1.8558508 -9.109375 12.82108 0.9161508
involvement.table.pos <- df.positivity %>%
xtabs(~ Q3.25 + us.involvement, .) %>% print
## us.involvement
## Q3.25 FALSE TRUE
## Don't know 8 28
## Mixed 16 48
## Positive 45 169
There’s no significant difference
(involvement.chi.pos <- chisq.test(involvement.table.pos))
##
## Pearson's Chi-squared test
##
## data: involvement.table.pos
## X-squared = 0.45477, df = 2, p-value = 0.7966
# Tiny Cramer's V
assocstats(involvement.table.pos)
## X^2 df P(> X^2)
## Likelihood Ratio 0.44548 2 0.80032
## Pearson 0.45477 2 0.79661
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.038
## Cramer's V : 0.038
# Components of chi-squared
(components <- involvement.chi.pos$residuals^2)
## us.involvement
## Q3.25 FALSE TRUE
## Don't know 0.0010051591 0.0002830856
## Mixed 0.2665928182 0.0750812427
## Positive 0.0872412178 0.0245699756
1-pchisq(components, involvement.chi.pos$parameter)
## us.involvement
## Q3.25 FALSE TRUE
## Don't know 0.9994975 0.9998585
## Mixed 0.8752056 0.9631553
## Positive 0.9573171 0.9877902
# Visualize differences
mosaic(involvement.table.pos,
labeling_args=list(set_varnames=c(us.involvement="US involvement",
Q3.25="Opinion of US"),
gp_labels=(gpar(fontsize=8))),
gp_varnames=gpar(fontsize=10, fontface=2))
NGOs with headquarters in the US are not significantly different from their foreign counterparts in their opinions of the US in general.
hq.table.pos <- df.positivity %>%
xtabs(~ Q3.25 + us.hq, .) %>% print
## us.hq
## Q3.25 FALSE TRUE
## Don't know 33 3
## Mixed 59 5
## Positive 201 13
There’s no overall significant difference, but some of the cells are too small
(hq.chi.pos <- chisq.test(hq.table.pos))
##
## Pearson's Chi-squared test
##
## data: hq.table.pos
## X-squared = 0.4148, df = 2, p-value = 0.8127
# Cramer's V is really, really low
assocstats(hq.table.pos)
## X^2 df P(> X^2)
## Likelihood Ratio 0.4018 2 0.81800
## Pearson 0.4148 2 0.81269
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.036
## Cramer's V : 0.036
# Components of chi-squared
(components <- hq.chi.pos$residuals^2)
## us.hq
## Q3.25 FALSE TRUE
## Don't know 0.010445425 0.145738550
## Mixed 0.008674404 0.121028587
## Positive 0.008621511 0.120290607
1-pchisq(components, hq.chi.pos$parameter)
## us.hq
## Q3.25 FALSE TRUE
## Don't know 0.9947909 0.9297224
## Mixed 0.9956722 0.9412803
## Positive 0.9956985 0.9416277
# Visualize differences
mosaic(hq.table.pos,
labeling_args=list(set_varnames=c(us.involvement="US involvement",
Q3.25="Opinion of US"),
gp_labels=(gpar(fontsize=8))),
gp_varnames=gpar(fontsize=10, fontface=2))
Does opinion of the US vary by: * Whether an NGO focuses on certain types of work?
In which countries does the US seem to have had more collaboration with NGOs?
CHECK: Opinions are not driven by cooptation - look at chapter 1 for boomerang type stuff - cooptation by donors - so in this case, the NGOs aren’t just being bought out?
Drop “don’t know”s - that leads to a t test for positivity
Create a nice simple summary table (like the one from Mike Ward’s paper) with graphs
# # Type of work
# # TODO: work.country is not the most reliable identifier---there be NAs
# # Q2.2_X
# asdf <- responses.full %>%
# select(survey.id, matches("Q2.2_\\d$")) %>%
# gather(type_of_work, value, -survey.id) %>%
# mutate(type_of_work = factor(type_of_work, levels=paste0("Q2.2_", seq(1:4)),
# labels=c("Organs", "Sex", "Labor", "Other"),
# ordered=TRUE))
# asdf %>%
# group_by(type_of_work) %>%
# summarise(bloop = n(),
# derp = sum(value, na.rm=TRUE),
# asdf = derp / n())
# # Should be 30, 408, 294, 116 with 479 total?
# #' Does NGO experience on the ground match improvements reported by the State Department?
# #' * Find country averages of government improvement, etc. - then show that X
# #' number of countries show improvement, etc.
# #' * Report by organization and by country - how many countries has the US had
# #' a positive influence + how many NGOs say the US has had a positive
# #' influence
# full <- left_join(country.indexes, tip.change,
# by=c("work.country" = "countryname")) %>%
# filter(num.responses >= 10) %>%
# mutate(country_label = ifelse(num.responses >= 10, work.country, ""))
# ggplot(full, aes(x=improvement_score, y=change_policy, label=work.country)) +
# geom_point() + geom_text(vjust=1.5) +
# geom_hline(yintercept=0) +
# scale_x_continuous(limits=c(0, 1)) +
# scale_y_continuous(limits=c(-2, 6))
# ggplot(country.indexes, aes(x=work.country, y=improvement_score)) +
# geom_bar(stat="identity") +
# coord_flip()
# #' Compare improvement scores with actual changes in TIP score to get a sense
# #' of if NGO experiences reflect changes in rankings
# ggplot(country.indexes, aes(x=work.country, y=positivity_score)) +
# geom_bar(stat="identity") +
# coord_flip()
# # Importance opinions
# importance.opinions <- responses.all %>%
# filter(Q3.19 == "Not an important actor") %>%
# select(survey.id, clean.id, Q3.19, contains("TEXT"), Q4.1)
# responses.all$Q3.19 %>%
# table %>% print %>% prop.table
# responses.countries %>%
# xtabs(~ Q3.25 + Q3.26, .)
# ggplot(responses.orgs, aes(x = Q1.5.factor)) + geom_bar() +
# labs(x = Hmisc::label(responses.orgs$Q1.5))
# # Importance of US
# asdf <- responses.all %>%
# select(clean.id, Q1.2, Q3.8, Q3.6, Q3.7)
# inconsistent.no <- c(1020, 1152, 1226, 1267, 1323, 1405, 1515)
# inconsistent.dont.know <- c(1051, 1512)
# qwer <- asdf %>%
# mutate(us.active = ifelse(clean.id %in% c(inconsistent.no, inconsistent.dont.know),
# "Yes", as.character(Q3.8)))
# qwer$us.active %>% table %>% print %>% prop.table
# sdfg <- qwer %>% group_by(clean.id) %>%
# summarize(said.no = ifelse(any(us.active == "No", na.rm=TRUE), TRUE, FALSE))
# sdfg$said.no %>% table %>% print %>% prop.table
# # US importance and positivity
# # Importance of report
# responses.orgs$Q2.5 %>% table %>% print %>% prop.table
# responses.countries$Q3.23 %>% table %>% print %>% prop.table
# heard.of.tip <- responses.countries %>%
# left_join(responses.orgs, by="survey.id") %>%
# filter(Q2.5 == "Yes") %>%
# group_by(survey.id) %>%
# mutate(know.score = ifelse(Q3.22 == "Don't know", FALSE, TRUE)) %>%
# select(know.score) %>% unique
# heard.of.tip$know.score %>% table %>% print %>% prop.table
# # Opinions of report
# opinions <- responses.all %>%
# select(clean.id, Q1.2, home.country, work.country, Q3.21_1, Q3.21_4_TEXT, Q3.24.Text)
# not.used.tip.ids <- c(1094, 1099, 1106, 1114, 1157, 1221, 1244, 1269,
# 1314, 1330, 1354, 1357, 1393, 1425)
# not.used.tip <- responses.all %>%
# mutate(no.response = ifelse(is.na(Q3.21_1) & is.na(Q3.21_2) &
# is.na(Q3.21_3) & is.na(Q3.21_4), TRUE, FALSE),
# explicit.no = ifelse(clean.id %in% not.used.tip.ids, TRUE, FALSE)) %>%
# select(clean.id, Q1.2, Q3.21_1, Q3.21_2, Q3.21_3, Q3.21_4, no.response, explicit.no) %>%
# group_by(clean.id) %>%
# summarize(no.response = ifelse(sum(no.response) > 0, TRUE, FALSE),
# explicit.no = ifelse(sum(explicit.no) > 0, TRUE, FALSE))
# not.used.tip$explicit.no %>% table